library(ggplot2)
library(ezids)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
library(tidyr)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(knitr)
library(magrittr)
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:tidyr':
##
## extract
Read the data .CSV files of adults arrest in DC area from 2016-2021
df_2016<-data.frame(read.csv("Arrests 2016 Public.csv"))
df_2017<-data.frame(read.csv("Arrests 2017 Public.csv"))
df_2018<-data.frame(read.csv("Arrests by Year, 2018.csv"))
df_2019<-data.frame(read.csv("Arrests by Year, 2019.csv"))
df_2020<-data.frame(read.csv("Arrests by Year 2020.csv"))
df_2021<-data.frame(read.csv("2021 Adult Arrests.csv"))
#bind the rows in the dataframes;
# data for year 2016 & 2017 have different column listings to the rest of the dataframes..
colnames(df_2016) = colnames(df_2017)
df_16_17 = rbind(df_2016,df_2017)
#also remove certain hard-coded columns that are not really needed in this analysis (leaving district location for now)
df_16_17 = df_16_17[-c(5,6,15,17:21,23:26)]
df_16_17 <- df_16_17 %>%
rename("Offense.Location.District" = "Offense.District")
# replace dots with underscore for clarity sake, i think..
names(df_16_17) = gsub("[.]", "_", names(df_16_17))
#colnames(df_16_17)
# since col names in 2018-2021 are the same, we repeat same processes as above.
df_18_21 = rbind(df_2018,df_2019,df_2020,df_2021)[,-c(5,6,15,17:21,23:26)]
# replace dots with underscore once more
names(df_18_21) = gsub("[.]", "_", names(df_18_21))
#colnames(df_18_21)
#Now bind both newly created dataframes into 1 whole table.
df_full = rbind(df_16_17,df_18_21)
#check for column names
colnames(df_full)
## [1] "Arrestee_Type" "Arrest_Year"
## [3] "Arrest_Date" "Arrest_Hour"
## [5] "Age" "Defendant_PSA"
## [7] "Defendant_District" "Defendant_Race"
## [9] "Defendant_Ethnicity" "Defendant_Sex"
## [11] "Arrest_Category" "Charge_Description"
## [13] "Arrest_Location_District" "Offense_Location_District"
#find unique values in the race, sex and arrest_category columns.
#unique(df_full$Defendant_Race)
#unique(df_full$Defendant_Sex)
#unique(df_full$Arrest_Category)
# most likely "UNK" is the same as "Unknown", so we can change this
df_full$Defendant_Race[df_full$Defendant_Race == 'UNK'] <- 'UNKNOWN'
#unique(df_full$Defendant_Race) - check that it changed
#same issue, "unk" is very likely "unknown", so change it.
df_full$Defendant_Sex[df_full$Defendant_Sex == 'UNK'] <- 'UNKNOWN'
#unique(df_full$Defendant_Sex) - check that it changed
# Arrest category - 4 different types of Fraud & Financial crimes , 3 types of Release Violations/Fugitive -- group them into one.
df_full$Arrest_Category = gsub("Fraud and Financial Crimes.*","Fraud and Financial Crimes", df_full$Arrest_Category)
df_full$Arrest_Category = gsub("Release Violations/Fugitive.*","Release Violations/Fugitive",df_full$Arrest_Category)
#sort(unique(df_full$Arrest_Category)) - check that new changes were made.
sapply(df_full, function(x) sum(is.na(x)))
## Arrestee_Type Arrest_Year Arrest_Date
## 0 0 0
## Arrest_Hour Age Defendant_PSA
## 0 0 29143
## Defendant_District Defendant_Race Defendant_Ethnicity
## 9349 0 0
## Defendant_Sex Arrest_Category Charge_Description
## 0 12 15
## Arrest_Location_District Offense_Location_District
## 185 11
#get month and day variables.. might be interesting, who knows?
df_full <- separate(df_full, col = Arrest_Date, into = c("Year","Month","Day"), sep = "-", remove = FALSE, fill="left")
#remove the new year column formed, it is redundant.. we already have Year column
df_full = df_full[,-4]
colnames(df_full)
## [1] "Arrestee_Type" "Arrest_Year"
## [3] "Arrest_Date" "Month"
## [5] "Day" "Arrest_Hour"
## [7] "Age" "Defendant_PSA"
## [9] "Defendant_District" "Defendant_Race"
## [11] "Defendant_Ethnicity" "Defendant_Sex"
## [13] "Arrest_Category" "Charge_Description"
## [15] "Arrest_Location_District" "Offense_Location_District"
# Factorize some variables
df_full$Arrest_Year = as.factor(df_full$Arrest_Year)
df_full$Month = as.factor(df_full$Month)
df_full$Day = as.factor(df_full$Day)
df_full$Defendant_Race = as.factor(df_full$Defendant_Race)
df_full$Defendant_Sex = as.factor(df_full$Defendant_Sex)
df_full$Arrest_Location_District = as.factor(df_full$Arrest_Location_District)
df_full$Offense_Location_District = as.factor(df_full$Offense_Location_District)
# convert to date format
df_full$Arrest_Date = as.Date(df_full$Arrest_Date)
# Day format
df_full$Day = day(df_full$Arrest_Date)
# i want to create a week-day variable
df_full$Weekday = weekdays(df_full$Arrest_Date)
df_full$Weekday = factor(df_full$Weekday, levels = as.character(wday(c(2:7,1), label=TRUE, abbr=FALSE)))
# convert crime types to factors
df_full$Arrest_Category = as.factor(df_full$Arrest_Category)
by_hour <- df_full %>%
group_by(Arrest_Hour) %>%
dplyr::summarise(Total = n())
by_hour
## # A tibble: 24 × 2
## Arrest_Hour Total
## <int> <int>
## 1 0 5684
## 2 1 7431
## 3 2 6774
## 4 3 6288
## 5 4 5427
## 6 5 4416
## 7 6 4469
## 8 7 6101
## 9 8 6731
## 10 9 6594
## # … with 14 more rows
ggplot(by_hour, aes(Arrest_Hour, Total, color = Arrest_Hour)) +
geom_line() +
ggtitle("Crimes By Hour") +
xlab("Hour of the Day") +
ylab("Total Crimes")
by_day <- df_full %>%
group_by(Day) %>%
dplyr::summarise(Total = n())
by_day
## # A tibble: 31 × 2
## Day Total
## <int> <int>
## 1 1 5518
## 2 2 5195
## 3 3 5242
## 4 4 5138
## 5 5 5192
## 6 6 5000
## 7 7 4880
## 8 8 5027
## 9 9 4949
## 10 10 5091
## # … with 21 more rows
ggplot(by_day, aes(Day, Total, color = Day)) +
geom_line() +
ggtitle("Crimes By Day") +
xlab("Day of the Month") +
ylab("Total Crimes")
by_weekday = df_full %>% group_by(Weekday) %>%
dplyr::summarise(Total = n())
by_weekday$Percent <- by_weekday$Total/dim(df_full)[1] * 100
by_weekday
## # A tibble: 7 × 3
## Weekday Total Percent
## <fct> <int> <dbl>
## 1 Monday 19560 12.8
## 2 Tuesday 21432 14.1
## 3 Wednesday 23529 15.4
## 4 Thursday 23246 15.3
## 5 Friday 23198 15.2
## 6 Saturday 22148 14.5
## 7 Sunday 19273 12.6
ggplot(by_weekday, aes(Weekday, Total, fill = Weekday)) +
geom_bar(stat = "identity") +
ggtitle("Crimes By Weekday ") +
xlab("Day of the Week") + ylab("Count") +
theme(legend.position = "none")
by_month <- df_full %>%
group_by(Month) %>%
dplyr::summarise(Total = n())
by_month$Percent <- by_month$Total/dim(df_full)[1] * 100
by_month
## # A tibble: 12 × 3
## Month Total Percent
## <fct> <int> <dbl>
## 1 01 12757 8.37
## 2 02 12163 7.98
## 3 03 13631 8.95
## 4 04 12351 8.11
## 5 05 13430 8.81
## 6 06 12729 8.35
## 7 07 13011 8.54
## 8 08 12997 8.53
## 9 09 12583 8.26
## 10 10 13037 8.56
## 11 11 11871 7.79
## 12 12 11826 7.76
ggplot(by_month, aes(Month, Total, fill = Month)) +
geom_bar(stat = "identity") +
ggtitle("Crimes By Month") +
xlab("Month") +
ylab("Count") +
theme(legend.position = "none")
ggplot(by_month, aes(x=Month, y=Total, group=1)) + geom_line()
####Crime incidence grouped into yearly plots..
by_year = df_full %>% group_by(Arrest_Year) %>%
dplyr::summarise(Total = n())
by_year$Percent <- by_year$Total/dim(df_full)[1] * 100
by_year
## # A tibble: 6 × 3
## Arrest_Year Total Percent
## <fct> <int> <dbl>
## 1 2016 29980 19.7
## 2 2017 31209 20.5
## 3 2018 29115 19.1
## 4 2019 27938 18.3
## 5 2020 18491 12.1
## 6 2021 15653 10.3
ggplot(by_year, aes(Arrest_Year, Total, fill = Arrest_Year)) +
geom_bar(stat = "identity") +
ggtitle("Crimes By Year ") +
xlab("Year") + ylab("Count") +
theme(legend.position = "none")
ggplot(by_year, aes(x=Arrest_Year, y=Total, group=1)) + geom_line()
#unique(df_full$Defendant_Race)
#table(df_full$Defendant_Sex)
df_wm = subset(df_full, subset = Defendant_Race == "WHITE" & Defendant_Sex == "MALE")
head(df_wm, 20)
## Arrestee_Type Arrest_Year Arrest_Date Month Day Arrest_Hour Age
## 1 Adult Arrest 2016 2016-01-01 01 1 0 39
## 2 Adult Arrest 2016 2016-01-01 01 1 0 27
## 12 Adult Arrest 2016 2016-01-01 01 1 1 27
## 14 Adult Arrest 2016 2016-01-01 01 1 1 26
## 24 Adult Arrest 2016 2016-01-01 01 1 13 48
## 54 Adult Arrest 2016 2016-01-01 01 1 2 25
## 76 Adult Arrest 2016 2016-01-01 01 1 3 21
## 84 Adult Arrest 2016 2016-01-01 01 1 3 41
## 96 Adult Arrest 2016 2016-01-01 01 1 6 29
## 98 Adult Arrest 2016 2016-01-01 01 1 7 22
## 104 Adult Arrest 2016 2016-01-02 01 2 0 51
## 110 Adult Arrest 2016 2016-01-02 01 2 1 29
## 114 Adult Arrest 2016 2016-01-02 01 2 11 64
## 123 Adult Arrest 2016 2016-01-02 01 2 15 33
## 131 Adult Arrest 2016 2016-01-02 01 2 16 23
## 138 Adult Arrest 2016 2016-01-02 01 2 17 49
## 161 Adult Arrest 2016 2016-01-02 01 2 21 30
## 171 Adult Arrest 2016 2016-01-02 01 2 3 22
## 175 Adult Arrest 2016 2016-01-02 01 2 4 28
## 194 Adult Arrest 2016 2016-01-03 01 3 15 27
## Defendant_PSA Defendant_District Defendant_Race Defendant_Ethnicity
## 1 Out of State Out of State WHITE UNKNOWN
## 2 Out of State Out of State WHITE NOT HISPANIC
## 12 Out of State Out of State WHITE HISPANIC
## 14 Out of State Out of State WHITE NOT HISPANIC
## 24 404 4D WHITE NOT HISPANIC
## 54 Out of State Out of State WHITE HISPANIC
## 76 Out of State Out of State WHITE HISPANIC
## 84 307 3D WHITE HISPANIC
## 96 Out of State Out of State WHITE HISPANIC
## 98 402 4D WHITE HISPANIC
## 104 Out of State Out of State WHITE NOT HISPANIC
## 110 Out of State Out of State WHITE HISPANIC
## 114 Out of State Out of State WHITE UNKNOWN
## 123 302 3D WHITE HISPANIC
## 131 506 5D WHITE HISPANIC
## 138 Out of State Out of State WHITE HISPANIC
## 161 Out of State Out of State WHITE NOT HISPANIC
## 171 Out of State Out of State WHITE HISPANIC
## 175 201 2D WHITE NOT HISPANIC
## 194 403 4D WHITE HISPANIC
## Defendant_Sex Arrest_Category
## 1 MALE Simple Assault
## 2 MALE Simple Assault
## 12 MALE Driving/Boating While Intoxicated
## 14 MALE Simple Assault
## 24 MALE Simple Assault
## 54 MALE Simple Assault
## 76 MALE Liquor Law Violations
## 84 MALE Driving/Boating While Intoxicated
## 96 MALE Simple Assault
## 98 MALE Simple Assault
## 104 MALE Release Violations/Fugitive
## 110 MALE Traffic Violations
## 114 MALE Simple Assault
## 123 MALE Assault with a Dangerous Weapon
## 131 MALE Weapon Violations
## 138 MALE Liquor Law Violations
## 161 MALE Narcotics
## 171 MALE Simple Assault
## 175 MALE Damage to Property
## 194 MALE Traffic Violations
## Charge_Description
## 1 Threats To Do Bodily Harm -misd
## 2 Simple Assault
## 12 Driving While Intoxicated -2nd Off
## 14 Simple Assault
## 24 Simple Assault
## 54 Simple Assault
## 76 Poss Of Open Container Of Alcohol/public Intoxication
## 84 Driving Under Influence -2nd Off
## 96 Simple Assault
## 98 Simple Assault
## 104 Failure To Appear (USAO)
## 110 No Permit
## 114 Simple Assault
## 123 Assault With A Dangerous Weapon
## 131 Possess Prohibited Weapon
## 138 Possession Of An Open Container Of Alcohol (poca)
## 161 Poss W/i To Dist A Controlled Substance
## 171 Simple Assault
## 175 Destruction Of Property Less Than $1000
## 194 No Permit
## Arrest_Location_District Offense_Location_District Weekday
## 1 2D 2D Friday
## 2 3D 3D Friday
## 12 4D 4D Friday
## 14 5D 5D Friday
## 24 1D 1D Friday
## 54 3D 3D Friday
## 76 2D 2D Friday
## 84 2D 2D Friday
## 96 2D 2D Friday
## 98 4D 4D Friday
## 104 4D 1D Saturday
## 110 4D 4D Saturday
## 114 2D 2D Saturday
## 123 4D 4D Saturday
## 131 5D 5D Saturday
## 138 3D 3D Saturday
## 161 1D 1D Saturday
## 171 2D 2D Saturday
## 175 3D 3D Saturday
## 194 4D 4D Sunday
# i want to create a week-day variable
df_wm$Weekday = weekdays(df_wm$Arrest_Date)
df_wm$Weekday = factor(df_wm$Weekday, levels = as.character(wday(c(2:7,1), label=TRUE, abbr=FALSE)))
same pattern here as above, will dig into a few other stuff too..
wm_by_hour <- df_wm %>%
group_by(Arrest_Hour) %>%
dplyr::summarise(Total = n())
wm_by_hour
## # A tibble: 24 × 2
## Arrest_Hour Total
## <int> <int>
## 1 0 606
## 2 1 801
## 3 2 813
## 4 3 695
## 5 4 552
## 6 5 329
## 7 6 324
## 8 7 405
## 9 8 424
## 10 9 442
## # … with 14 more rows
ggplot(wm_by_hour, aes(Arrest_Hour, Total, color = Arrest_Hour)) +
geom_line() +
ggtitle("White Males - Crimes By Hour") +
xlab("Hour of the Day") +
ylab("Total Crimes")
wm_by_day <- df_wm %>%
group_by(Day) %>%
dplyr::summarise(Total = n())
wm_by_day
## # A tibble: 31 × 2
## Day Total
## <int> <int>
## 1 1 524
## 2 2 404
## 3 3 403
## 4 4 395
## 5 5 405
## 6 6 456
## 7 7 406
## 8 8 377
## 9 9 399
## 10 10 430
## # … with 21 more rows
ggplot(wm_by_day, aes(Day, Total, color = Day)) +
geom_line() +
ggtitle("White Males - Crimes By Day") +
xlab("Day of the Month") +
ylab("Total Crimes")
wm_by_weekday = df_wm %>% group_by(Weekday) %>%
dplyr::summarise(Total = n())
wm_by_weekday$Percent <- wm_by_weekday$Total/dim(df_wm)[1] * 100
wm_by_weekday
## # A tibble: 7 × 3
## Weekday Total Percent
## <fct> <int> <dbl>
## 1 Monday 1566 12.7
## 2 Tuesday 1465 11.9
## 3 Wednesday 1640 13.4
## 4 Thursday 1736 14.1
## 5 Friday 1877 15.3
## 6 Saturday 2046 16.7
## 7 Sunday 1953 15.9
ggplot(wm_by_weekday, aes(Weekday, Total, fill = Weekday)) +
geom_bar(stat = "identity") +
ggtitle("White Males - Crimes By Weekday ") +
xlab("Day of the Week") + ylab("Count") +
theme(legend.position = "none")
wm_by_month <- df_wm %>%
group_by(Month) %>%
dplyr::summarise(Total = n())
wm_by_month$Percent <- wm_by_month$Total/dim(df_wm)[1] * 100
wm_by_month
## # A tibble: 12 × 3
## Month Total Percent
## <fct> <int> <dbl>
## 1 01 1191 9.70
## 2 02 998 8.13
## 3 03 1125 9.16
## 4 04 922 7.51
## 5 05 1059 8.62
## 6 06 1010 8.22
## 7 07 966 7.86
## 8 08 962 7.83
## 9 09 1023 8.33
## 10 10 1108 9.02
## 11 11 991 8.07
## 12 12 928 7.56
ggplot(wm_by_month, aes(Month, Total, fill = Month)) +
geom_bar(stat = "identity") +
ggtitle("White Males - Crimes By Month") +
xlab("Month") +
ylab("Count") +
theme(legend.position = "none")
ggplot(wm_by_month, aes(x=Month, y=Total, group=1)) + geom_line()
####White Men - Yearly Crime Incidents
wm_by_year = df_wm %>% group_by(Arrest_Year) %>%
dplyr::summarise(Total = n())
wm_by_year$Percent <- wm_by_year$Total/dim(df_wm)[1] * 100
wm_by_year
## # A tibble: 6 × 3
## Arrest_Year Total Percent
## <fct> <int> <dbl>
## 1 2016 2620 21.3
## 2 2017 2636 21.5
## 3 2018 2298 18.7
## 4 2019 2193 17.9
## 5 2020 1426 11.6
## 6 2021 1110 9.04
ggplot(wm_by_year, aes(Arrest_Year, Total, fill = Arrest_Year)) +
geom_bar(stat = "identity") +
ggtitle("White Males - Crimes By Year ") +
xlab("Year") + ylab("Count") +
theme(legend.position = "none")
ggplot(wm_by_year, aes(x=Arrest_Year, y=Total, group=1)) + geom_line()
wm_by_cat <- df_wm %>%
group_by(Arrest_Category) %>%
dplyr::summarise(Total = n()) %>%
arrange(desc(Total))
wm_by_cat[1:10,]
## # A tibble: 10 × 2
## Arrest_Category Total
## <fct> <int>
## 1 Simple Assault 2661
## 2 Traffic Violations 1549
## 3 Release Violations/Fugitive 1133
## 4 Driving/Boating While Intoxicated 1045
## 5 Other Crimes 822
## 6 Theft 675
## 7 Narcotics 654
## 8 Liquor Law Violations 562
## 9 Disorderly Conduct 433
## 10 Damage to Property 414
ggplot(wm_by_cat, aes(reorder(Arrest_Category, Total), Total)) +
geom_bar(stat = "identity") + coord_flip() +
scale_y_continuous(breaks = seq(0,3000,500)) +
ggtitle("Crimes By Arrest Category") +
xlab("Crime Type") +
ylab("Number of Incidents")
wm_by_cat_year <- df_wm %>% group_by(Arrest_Year, Arrest_Category) %>%
dplyr::summarise(Total = n())
## `summarise()` has grouped output by 'Arrest_Year'. You can override using the
## `.groups` argument.
wm_by_cat_year[1:10,]
## # A tibble: 10 × 3
## # Groups: Arrest_Year [1]
## Arrest_Year Arrest_Category Total
## <fct> <fct> <int>
## 1 2016 Aggravated Assault 23
## 2 2016 Assault on a Police Officer 42
## 3 2016 Assault with a Dangerous Weapon 73
## 4 2016 Burglary 25
## 5 2016 Damage to Property 98
## 6 2016 Disorderly Conduct 83
## 7 2016 Driving/Boating While Intoxicated 206
## 8 2016 Fraud and Financial Crimes 11
## 9 2016 Homicide 2
## 10 2016 Kidnapping 4
ggplot(wm_by_cat_year, aes(reorder(Arrest_Category, Total), Total, fill = Arrest_Year)) +
geom_bar(stat = "identity") +
scale_y_continuous(breaks = seq(0,3000,500)) +
coord_flip() + ggtitle("Crimes By Code and Year") +
xlab("Crime Text Code") +
ylab("Total Crimes")
unique(df_wm$Arrest_Location_District)
## [1] 2D 3D 4D 5D 1D 7D 6D UNKNOWN <NA>
## [10]
## Levels: 1D 2D 3D 4D 5D 6D 7D UNKNOWN
table(df_wm$Arrest_Location_District)
##
## 1D 2D 3D 4D 5D 6D 7D UNKNOWN
## 22 1858 3056 2556 2817 1231 437 260 29
unique(df_wm$Offense_Location_District)
## [1] 2D 3D 4D 5D 1D 7D 6D #N/A UNKNOWN
## [10] Unk
## Levels: #N/A 1D 2D 3D 4D 5D 6D 7D Unk UNKNOWN
table(df_wm$Offense_Location_District)
##
## #N/A 1D 2D 3D 4D 5D 6D 7D Unk UNKNOWN
## 12 2044 3103 2532 2720 1195 413 238 16 10
### drop the unknowns here .. they are few..
wm_by_ALD <- df_wm %>% group_by(Arrest_Location_District) %>%
dplyr::summarise(Total = n()) %>%
dplyr::arrange(desc(Total))
wm_by_ALD2 = wm_by_ALD[1:7,]
wm_by_OLD <- df_wm %>% group_by(Offense_Location_District) %>%
dplyr::summarise(Total = n()) %>%
dplyr::arrange(desc(Total))
wm_by_OLD2 = wm_by_OLD[1:7,]
ggplot(wm_by_ALD2, aes(reorder(Arrest_Location_District, -Total), Total)) +
geom_bar(stat = "identity") +
ggtitle("Crimes by Arrest Location District") +
xlab("Location District") +
ylab("Total Crimes")
ggplot(wm_by_OLD2, aes(reorder(Offense_Location_District, -Total), Total)) +
geom_bar(stat = "identity") +
ggtitle("Crimes by Offense Location District") +
xlab("Location District") +
ylab("Total Crimes")
# top 5 crimes in each district
#ALD_dc_top7 <- wm_by_ALD$Arrest_Location_District[1:5]
#ALD_top7_dc <- subset(df_wm, Arrest_Location_District %in% wm_by_ALD$Arrest_Location_District[1:5])
#ALD_top7_dc$Arrest_Location_District <- factor(ALD_top7_dc$Arrest_Location_District)
#ggplot(ALD_top7_dc, aes(Arrest_Category, fill = Arrest_Location_District)) +
#geom_bar(position = "dodge") +
#ggtitle("Crimes by District Police HeadQuarters - Top 5") +
#xlab("Police HQ") +
#ylab("Total Crimes")
#top crime by ARREST Location District
ALD_by_crime <- df_wm %>%
group_by(Arrest_Location_District, Arrest_Category) %>%
dplyr::summarise(Total = n()) %>%
arrange(desc(Total)) %>% top_n(n = 1)
## `summarise()` has grouped output by 'Arrest_Location_District'. You can
## override using the `.groups` argument.
## Selecting by Total
ALD_by_crime1 = ALD_by_crime[1:7,]
#dc_by_crime <- as.data.frame(dc_by_crime)
#dc_by_crime$Dc_Dist <- factor(dc_by_crime$Dc_Dist)
#dc_by_crime$Text_General_Code <- factor(dc_by_crime$Text_General_Code)
ggplot(ALD_by_crime1, aes(Arrest_Location_District, Total, fill = Arrest_Category)) +
geom_bar(stat = "identity") +
ggtitle("Top Crime by Arrest Location District") +
xlab("Location District") +
ylab("Total")
#top crime by OFFENSE Location District
OLD_by_crime <- df_wm %>%
group_by(Offense_Location_District, Arrest_Category) %>%
dplyr::summarise(Total = n()) %>%
arrange(desc(Total)) %>% top_n(n = 1)
## `summarise()` has grouped output by 'Offense_Location_District'. You can
## override using the `.groups` argument.
## Selecting by Total
OLD_by_crime1 = OLD_by_crime[1:7,]
#dc_by_crime <- as.data.frame(dc_by_crime)
#dc_by_crime$Dc_Dist <- factor(dc_by_crime$Dc_Dist)
#dc_by_crime$Text_General_Code <- factor(dc_by_crime$Text_General_Code)
ggplot(OLD_by_crime1, aes(Offense_Location_District, Total, fill = Arrest_Category)) +
geom_bar(stat = "identity") +
ggtitle("Top Crime by Offense Location District") +
xlab("Location District") +
ylab("Total")